pathway <- "~/PhD/KM_Piecewise_Major_Review_final/SimStudy-Res/"
#"I'm not sure the simulation study should be completely resigned to the Appendix - can this be detailed in the main body? The scope of the simulation is limited to the data generating mechanism being one the method can handle.
#In the scenarios when it gets the number of changepoints incorrect - how badly does it go wrong?"
list.of.packages <- need<-c("PiecewiseChangepoint", "flexsurv","xlsx", "dplyr", "ggplot2", "sjstats") #needed libraries
#sjstats for bootstrapping
res <- lapply(list.of.packages, require, character.only = TRUE)
not.loaded <- list.of.packages[which(sapply(res, unlist) ==F)]
not.loaded2 <- lapply(not.loaded, require,lib.loc = '/home/cooneph1/R-packages', character.only = TRUE)
not.installed <- not.loaded2[which(sapply(not.loaded2, unlist) ==F)]
#load the packages
if(length(not.installed)) install.packages(not.loaded,lib = '/home/cooneph1/R-packages')
if(length(not.installed)) lapply(not.loaded, require,lib.loc = '/home/cooneph1/R-packages', character.only = TRUE)
sim.study <- function(n_obs,n_events_req,rate,t_change, max_time =2,
n.sims, sims = 10750,burn_in = 750, timescale, lambda, time_seq, right_wrong = F){
param_true <- list()
param_true$lambda <- matrix(rate, nrow = 1)
param_true$changepoint <- matrix(t_change, nrow = 1)
if(any(is.na(t_change))){
num.breaks = 0
}else{
num.breaks <- length(t_change)
}
n.events <- rep(NA, n.sims)
avg.Haz <- array(NA, dim = c(n.sims, 7))
avg.change <- array(NA, dim = c(n.sims, 7))
ISSE_vec <- RMSE_vec <- RSt_err <- matrix(ncol = 8, nrow = n.sims)
mod_param <- c("weibull", "lnorm", "gamma", "gompertz", "llogis", "exponential", "gengamma")
# True_St <- get_Surv(param_true, chng.num = "all", time = time_seq)[,1]
# True_RSt <- sfsmisc::integrate.xy(x = time_seq, True_St)
for(i in 1:n.sims){
print(paste0("Sim ",i," of ",n.sims))
Sys.sleep(1 / n.sims)
n_events <- 0
df <- gen_piece_df(n_obs = n_obs,n_events_req = n_events_req,
num.breaks = num.breaks,rate = rate ,
t_change = t_change, max_time = max_time)
df$status2 <- 1
surv.obj.df <- survfit(Surv(time_event ,status2)~1, data = df)
True_St <- summary(surv.obj.df, t = time_seq)[["surv"]]
time_seq_km <- summary(surv.obj.df, t = time_seq)[["time"]]
#True_St <- get_Surv(param_true, chng.num = "all", time = time_seq)[,1]
True_RSt <- sfsmisc::integrate.xy(x = time_seq_km, True_St)
n_events <- df$status
n.events[i] <- sum(df$status == 1)
#tryCatch({
mod <- collapsing.model(df,n.chains = 1,n.iter = sims, burn_in = burn_in, timescale = timescale )
most_prob <- as.numeric(names(mod$prob.changepoint[which.max(mod$prob.changepoint)]))
# PEM_St <- rowMeans(get_Surv(mod, chng.num = most_prob, time = time_seq_km))
# PEM_RSt <- sfsmisc::integrate.xy(x = time_seq_km, PEM_St)
#
# ISSE_vec[i,1] <- sfsmisc::integrate.xy(x = time_seq_km, fx = (True_St-PEM_St)^2)
# RMSE_vec[i,1] <- (PEM_RSt-True_RSt)^2
# RSt_err[i,1]<- abs(PEM_RSt-True_RSt)/True_RSt
#
# for(b in 1:length(mod_param)){
# fit.parm <- flexsurvreg(formula = Surv(time, status) ~ 1, data = df, dist=mod_param[b])
# mod_St <- summary(fit.parm, t = time_seq_km)[[1]]$est
# mod_RSt <- sfsmisc::integrate.xy(x = time_seq_km, fx = mod_St)
# ISSE_vec[i,1+b] <- sfsmisc::integrate.xy(x = time_seq_km, fx = (True_St-mod_St)^2)
# RMSE_vec[i,1+b] <- (mod_RSt-True_RSt)^2
# RSt_err[i,1+b]<- abs(mod_RSt-True_RSt)/True_RSt
# }
index_prob <- apply(mod$changepoint,1, function(x){length(na.omit(x))==most_prob})
avg.Haz[i,] <- colMeans(mod[["lambda"]][index_prob,])
avg.change[i,] <- colMeans(mod[["changepoint"]][index_prob,])
#}, error = function(e){print("Nuts")})
}
chang.vals <-apply(avg.change,1, function(x){length(na.omit(x))})
uniq.chang <- unique(chang.vals)[order(unique(chang.vals))]
final.chng <- data.frame(mean = rep(NA,length(uniq.chang)),sd = rep(NA,length(uniq.chang)), nsims = rep(NA,length(uniq.chang)))
for(i in 1:length(uniq.chang)){
final.chng[i,1] <- mean(avg.change[which(chang.vals == uniq.chang[i]),uniq.chang[i]], na.rm =T)
final.chng[i,2] <- sd(avg.change[which(chang.vals == uniq.chang[i]),uniq.chang[i]], na.rm =T)
final.chng[i,3] <- table(chang.vals)[i]
}
row.names(final.chng) <- uniq.chang
prob.correct <- mean(chang.vals == num.breaks)
if(length(which(chang.vals == num.breaks)) >1){
if(num.breaks == 0 ){
se.chng <- mean.chng <- NA
mean.haz <- mean(avg.Haz[which(chang.vals == num.breaks),
1:(num.breaks+1)], na.rm = T)
se.haz <- sd(avg.Haz[which(chang.vals == num.breaks),
1:(num.breaks+1)], na.rm = T)
}else if(num.breaks == 1 ){
mean.chng <- mean(avg.change[which(chang.vals == num.breaks),
1:num.breaks], na.rm =T)
se.chng <- sd(avg.change[which(chang.vals == num.breaks),1:num.breaks],na.rm = T)
mean.haz <- colMeans(avg.Haz[which(chang.vals == num.breaks),
1:(num.breaks+1)], na.rm = T)
se.haz <- apply(avg.Haz[which(chang.vals == num.breaks),
1:(num.breaks+1)],2,FUN = sd, na.rm = T)
}else{
mean.chng <- colMeans(avg.change[which(chang.vals == num.breaks),
1:num.breaks], na.rm =T)
se.chng <- apply(avg.change[which(chang.vals == num.breaks),1:num.breaks]
,2,FUN= sd,na.rm = T)
mean.haz <- colMeans(avg.Haz[which(chang.vals == num.breaks),
1:(num.breaks+1)], na.rm = T)
se.haz <- apply(avg.Haz[which(chang.vals == num.breaks),
1:(num.breaks+1)],2,FUN = sd, na.rm = T)
}
}else{
mean.chng<- NA
se.chng <- NA
mean.haz <- NA
se.haz <- NA
}
# RSt_err_correct <- mean(RSt_err[length(t_change) == chang.vals,1])
# RSt_err_wrong <- mean(RSt_err[length(t_change) != chang.vals,1])
#
# ISSE_vec_correct <-mean(ISSE_vec[length(t_change) == chang.vals,1])
# ISSE_vec_wrong <- mean(ISSE_vec[length(t_change) != chang.vals,1])
#
# RMSE_vec_correct <-mean(RMSE_vec[length(t_change) == chang.vals,1])
# RMSE_vec_wrong <- mean(RMSE_vec[length(t_change) != chang.vals,1])
return(list(final.chng = final.chng,prob.correct = prob.correct,
chng = data.frame(mean.chng,se.chng),
haz = data.frame(mean.haz,se.haz),
prob.mod = chang.vals#,
# ISSE_vec = ISSE_vec,
# RMSE_vec = RMSE_vec,
# RSt_err = RSt_err,
# RSt_err_correct = RSt_err_correct,
# RSt_err_wrong = RSt_err_wrong,
# ISSE_vec_correct = ISSE_vec_correct,
# ISSE_vec_wrong = ISSE_vec_wrong,
# RMSE_vec_correct = RMSE_vec_correct,
# RMSE_vec_wrong = RMSE_vec_wrong
))
}
sim.study_right_wrong <- function(n_obs,n_events_req,rate,t_change, max_time =2,
n.sims, sims = 10750,burn_in = 750, timescale, lambda, time_seq){
param_true <- list()
param_true$lambda <- matrix(rate, nrow = 1)
param_true$changepoint <- matrix(t_change, nrow = 1)
if(any(is.na(t_change))){
num.breaks = 0
}else{
num.breaks <- length(t_change)
}
n.events <- rep(NA, n.sims)
avg.Haz <- array(NA, dim = c(n.sims, 7))
avg.change <- array(NA, dim = c(n.sims, 7))
ISSE_vec <- RMSE_vec <- RSt_err <- matrix(ncol = 10, nrow = n.sims)
mod_param <- c("weibull", "lnorm", "gamma", "gompertz", "llogis", "exponential", "gengamma")
# True_St <- get_Surv(param_true, chng.num = "all", time = time_seq)[,1]
# True_RSt <- sfsmisc::integrate.xy(x = time_seq, True_St)
for(i in 1:n.sims){
print(paste0("Sim ",i," of ",n.sims))
Sys.sleep(1 / n.sims)
n_events <- 0
df <- gen_piece_df(n_obs = n_obs,n_events_req = n_events_req,
num.breaks = num.breaks,rate = rate ,
t_change = t_change, max_time = max_time)
df$status2 <- 1
surv.obj.df <- survfit(Surv(time_event ,status2)~1, data = df)
summary_km <- summary(surv.obj.df, t = time_seq)
True_St <- summary_km[["surv"]]
time_seq_km <- summary_km[["time"]]
#True_St <- get_Surv(param_true, chng.num = "all", time = time_seq)[,1]
True_RSt <- sfsmisc::integrate.xy(x = time_seq_km, True_St)
n_events <- df$status
n.events[i] <- sum(df$status == 1)
#tryCatch({
if(any(is.na(t_change))){
num_breaks_correct <- NA
num_breaks_wrong1 <- 1
num_breaks_wrong2 <- 2
}else{
num_breaks_correct <- length(t_change)
num_breaks_wrong1 <- length(t_change)-1
num_breaks_wrong2 <- length(t_change)+1
}
if(!is.na(num_breaks_correct)){
mod_correct <- gibbs.sampler(df, num.breaks = num_breaks_correct, n.iter =sims, burn_in = burn_in, num.chains = 2)
mod_correct_St <- rowMeans(get_Surv(mod_correct, time = time_seq_km))
}else{
mod_correct_St <- pexp(time_seq_km, sum(df$status)/sum(df$time), lower.tail = FALSE)
}
if(!is.na(num_breaks_wrong1)){
mod_wrong1 <- gibbs.sampler(df, num.breaks = num_breaks_wrong1, n.iter =sims, burn_in = burn_in, num.chains = 2) # Too few
mod_wrong1_St <- rowMeans(get_Surv(mod_wrong1, time = time_seq_km))
}else{
mod_wrong1_St <- pexp(time_seq_km, sum(df$status)/sum(df$time), lower.tail = FALSE)
}
if(!is.na(num_breaks_wrong2)){
mod_wrong2 <- gibbs.sampler(df, num.breaks= num_breaks_wrong2, n.iter =sims, burn_in = burn_in, num.chains = 2) # Too many
mod_wrong2_St <- rowMeans(get_Surv(mod_wrong2, time = time_seq_km))
}else{
mod_wrong2_St <- pexp(time_seq_km, sum(df$status)/sum(df$time), lower.tail = FALSE)
}
mod_correct_RSt <- sfsmisc::integrate.xy(x = time_seq_km, mod_correct_St)
mod_wrong1_RSt <- sfsmisc::integrate.xy(x = time_seq_km, mod_wrong1_St)
mod_wrong2_RSt <- sfsmisc::integrate.xy(x = time_seq_km, mod_wrong2_St)
ISSE_vec[i,1] <- sfsmisc::integrate.xy(x = time_seq_km, fx = (True_St-mod_correct_St)^2)
ISSE_vec[i,2] <- sfsmisc::integrate.xy(x = time_seq_km, fx = (True_St-mod_wrong1_St)^2)
ISSE_vec[i,3] <- sfsmisc::integrate.xy(x = time_seq_km, fx = (True_St-mod_wrong2_St)^2)
RMSE_vec[i,1] <- (mod_correct_RSt-True_RSt)^2
RMSE_vec[i,2] <- (mod_wrong1_RSt-True_RSt)^2
RMSE_vec[i,3] <- (mod_wrong2_RSt-True_RSt)^2
RSt_err[i,1]<- abs(mod_correct_RSt-True_RSt)/True_RSt
RSt_err[i,2]<- abs(mod_wrong1_RSt-True_RSt)/True_RSt
RSt_err[i,3]<- abs(mod_wrong2_RSt-True_RSt)/True_RSt
if(RSt_err[i,1] > 1 ){
plot(time_seq_km, y = True_St)
lines(time_seq_km,y = mod_correct_St, col = "red" )
}
for(b in 1:length(mod_param)){
fit.parm <- flexsurvreg(formula = Surv(time, status) ~ 1, data = df, dist=mod_param[b])
mod_St <- summary(fit.parm, t = time_seq_km)[[1]]$est
mod_RSt <- sfsmisc::integrate.xy(x = time_seq_km, fx = mod_St)
ISSE_vec[i,3+b] <- sfsmisc::integrate.xy(x = time_seq_km, fx = (True_St-mod_St)^2)
RMSE_vec[i,3+b] <- (mod_RSt-True_RSt)^2
RSt_err[i,3+b]<- abs(mod_RSt-True_RSt)/True_RSt
}
if(RSt_err[i,1] > 1 ){
fit.parm <- flexsurvreg(formula = Surv(time, status) ~ 1, data = df, dist=mod_param[1])
mod_St <- summary(fit.parm, t = time_seq_km)[[1]]$est
plot(time_seq_km, y = True_St)
lines(time_seq_km,y = mod_correct_St, col = "red" )
lines(time_seq_km,y = mod_St, col = "blue" )
}
}
if(any(RSt_err[,1] < 0)){
RSt_err <- RSt_err[-which(RSt_err[,1] < 0),]
}
return(list( ISSE_vec = ISSE_vec,
RMSE_vec = RMSE_vec,
RSt_err = RSt_err,
change_num = c(num_breaks_correct,num_breaks_wrong1,num_breaks_wrong2)))
}
#No Changepoint
rate1<- c(0.25)
rate2<- c(0.5)
rate3<- c(0.75)
sample_vec <- c(100,200)
censor_vec <- c(0,0.5)
rate_list <- list()
for(i in 1:3){
current_rate <-get(paste0("rate",i))
rate_list[[i]] <- current_rate
}
sims_eval <- 500
for(q in 1:length(censor_vec) ){
for(j in 1:length(sample_vec)){
for(i in 1:length(rate_list)){
current_name <- paste0("Collapsing_rate_",paste0(rate_list[[i]],collapse = "_"),"_size_",sample_vec[j],"_censor_",censor_vec[q])
assign(current_name, sim.study_right_wrong(n_obs = sample_vec[j],
n_events_req= round(sample_vec[j]*(1-censor_vec[q])),
rate = rate_list[[i]],
max_time = 2,
t_change = NA,
n.sims = sims_eval,
sims =2500,
burn_in = 750,
timescale = "months",
lambda = 1,
time_seq = c(0:15)))
saveRDS(get(current_name), file = paste0(pathway,current_name,".rds"))
}
}
}
#Other Parameters
rate1 <- c(0.5,0.75)
rate2 <- c(0.25,0.75)
rate3 <- c(0.75,0.5)
rate4 <- c(0.75,0.25)
rate5 <- c(0.25,0.5,0.75)
rate6 <- c(0.75,0.5,0.25)
rate7 <- c(0.75,0.2,0.75)
rate8 <- c(0.2,0.75,0.2)
time1 <- c(0.5,1)
censor_vec <- c(0) #c(0,0.33)
sample_vec <- 300#c(300,500,1000)
rate_list <- list()
for(i in 1:8){
current_rate <-get(paste0("rate",i))
rate_list[[i]] <- current_rate
}
for(q in 1:length(censor_vec) ){
for(j in 1:length(sample_vec)){
for(i in 1:length(rate_list)){
current_name <- paste0("Collapsing_rate_",paste0(rate_list[[i]],collapse = "_"),"_size_",sample_vec[j],"_censor_",censor_vec[q])
assign(current_name, sim.study_right_wrong(n_obs = sample_vec[j],
n_events_req= round(sample_vec[j]*(1-censor_vec[q])),
rate = rate_list[[i]],
max_time = 2,
t_change = c(0.5,1)[1:(length(rate_list[[i]])-1)],
n.sims = sims_eval,
sims =2500,
burn_in = 750,
timescale = "months",
lambda = 1,
time_seq = c(0:15)))
saveRDS(get(current_name), file = paste0(pathway,current_name,".rds"))
}
}
}
i = 8; q <- 1; j<-1
current_name <- paste0("Collapsing_rate_",paste0(rate_list[[i]],collapse = "_"),"_size_",sample_vec[j],"_censor_",censor_vec[q])
assign(current_name, sim.study_right_wrong(n_obs = sample_vec[j],
n_events_req= round(sample_vec[j]*(1-censor_vec[q])),
rate = rate_list[[i]],
max_time = 2,
t_change = c(0.5,1)[1:(length(rate_list[[i]])-1)],
n.sims = sims_eval,
sims =2500,
burn_in = 750,
timescale = "months",
lambda = 1,
time_seq = c(0:15)))
saveRDS(get(current_name), file = paste0(pathway,current_name,".rds"))
undebug(sim.study_right_wrong)
for(q in 1:length(censor_vec) ){
for(j in 1:length(sample_vec)){
for(i in 1:length(rate_list)){
current_name <- paste0("Collapsing_rate_",paste0(rate_list[[i]],collapse = "_"),"_size_",sample_vec[j],"_censor_",censor_vec[q])
print(colMeans(get(current_name)[["RSt_err"]][,1:3]))
#saveRDS(get(current_name), file = paste0(pathway,current_name,".rds"))
}
}
}
c(num_breaks_correct,num_breaks_wrong1,num_breaks_wrong2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.